Set up by loading libraries and data sets
install.packages("plotly")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lterdatasampler)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(ggfortify)
# load crab data
data("pie_crab")
inspect the data
pie_crab
## # A tibble: 392 × 9
## date latitude site size air_temp air_temp_sd water_temp water…¹ name
## <date> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 2016-07-24 30 GTM 12.4 21.8 6.39 24.5 6.12 Guan…
## 2 2016-07-24 30 GTM 14.2 21.8 6.39 24.5 6.12 Guan…
## 3 2016-07-24 30 GTM 14.5 21.8 6.39 24.5 6.12 Guan…
## 4 2016-07-24 30 GTM 12.9 21.8 6.39 24.5 6.12 Guan…
## 5 2016-07-24 30 GTM 12.4 21.8 6.39 24.5 6.12 Guan…
## 6 2016-07-24 30 GTM 13.0 21.8 6.39 24.5 6.12 Guan…
## 7 2016-07-24 30 GTM 10.3 21.8 6.39 24.5 6.12 Guan…
## 8 2016-07-24 30 GTM 11.2 21.8 6.39 24.5 6.12 Guan…
## 9 2016-07-24 30 GTM 12.7 21.8 6.39 24.5 6.12 Guan…
## 10 2016-07-24 30 GTM 14.6 21.8 6.39 24.5 6.12 Guan…
## # … with 382 more rows, and abbreviated variable name ¹​water_temp_sd
## # ℹ Use `print(n = ...)` to see more rows
Look at variable correlations
pie_crab %>%
select(latitude, air_temp:water_temp_sd) %>%
cor()
## latitude air_temp air_temp_sd water_temp water_temp_sd
## latitude 1.00000000 -0.99497146 0.7932130 -0.9571738 0.04188273
## air_temp -0.99497146 1.00000000 -0.7780397 0.9632287 -0.04532179
## air_temp_sd 0.79321301 -0.77803972 1.0000000 -0.7879147 0.40970338
## water_temp -0.95717376 0.96322867 -0.7879147 1.0000000 -0.11480905
## water_temp_sd 0.04188273 -0.04532179 0.4097034 -0.1148090 1.00000000
Conduct the PCA test
crab_pca <- pie_crab %>%
select(latitude, air_temp:water_temp_sd) %>%
prcomp()
Look at variance contributions
summary(crab_pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 6.6533 1.21857 0.78276 0.45905 0.25065
## Proportion of Variance 0.9492 0.03184 0.01314 0.00452 0.00135
## Cumulative Proportion 0.9492 0.98100 0.99413 0.99865 1.00000
Look at variable loadings
crab_pca$rotation
## PC1 PC2 PC3 PC4 PC5
## latitude 0.63779536 -0.1456839 0.4171761 -0.06536301 0.62744330
## air_temp -0.56802027 0.1189354 -0.2222182 -0.30713175 0.72076107
## air_temp_sd 0.11790786 0.3206932 0.2023075 -0.87678588 -0.27124121
## water_temp -0.50648072 -0.2207416 0.8270560 0.06654101 -0.07937944
## water_temp_sd 0.01204459 0.9016982 0.2272295 0.35807342 0.08333967
Look at site/observation scores
cor(crab_pca$x)
## PC1 PC2 PC3 PC4 PC5
## PC1 1.000000e+00 1.282922e-15 -5.203544e-15 -2.758252e-14 1.143021e-13
## PC2 1.282922e-15 1.000000e+00 4.326717e-16 -9.482459e-16 7.313997e-16
## PC3 -5.203544e-15 4.326717e-16 1.000000e+00 -1.202820e-15 5.418838e-15
## PC4 -2.758252e-14 -9.482459e-16 -1.202820e-15 1.000000e+00 -8.592028e-15
## PC5 1.143021e-13 7.313997e-16 5.418838e-15 -8.592028e-15 1.000000e+00
Make screeplot of variable contributions
screeplot(crab_pca)
Make a biplot
biplot(crab_pca)
Make a ggplot biplot (and make it interactive)
ggplotly(autoplot(crab_pca, loadings = TRUE, loadings.label = TRUE) +
theme_minimal())
Add PC axes to pie_crab data
pie_crab <- bind_cols(pie_crab, crab_pca$x)
Conduct the multiple linear regression
model <- lm(size ~ PC1 + PC2 + PC3 + PC4 + PC5, data = pie_crab)
summary(model)
##
## Call:
## lm(formula = size ~ PC1 + PC2 + PC3 + PC4 + PC5, data = pie_crab)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.4215 -1.7999 -0.0104 1.7884 7.8223
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.65801 0.13517 108.443 < 2e-16 ***
## PC1 0.30758 0.02034 15.120 < 2e-16 ***
## PC2 -0.15108 0.11106 -1.360 0.17453
## PC3 0.82621 0.17290 4.778 2.51e-06 ***
## PC4 0.83790 0.29483 2.842 0.00472 **
## PC5 -2.56991 0.53996 -4.759 2.75e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.676 on 386 degrees of freedom
## Multiple R-squared: 0.4239, Adjusted R-squared: 0.4165
## F-statistic: 56.81 on 5 and 386 DF, p-value: < 2.2e-16